# Load library
library(dplyr)
library(survival)
library(janitor)
library(magrittr)
library(car)
library(ggplot2)
library(tidyverse)
library(broom)
library(MASS)
library(boot)
library(gtsummary)
library(knitr)
library(kableExtra)
library(gridExtra)
#print(citation("survival"), bibtex=TRUE)Cox regression modeling of survival after chemotherapy for colon cancer
Data: Survival after chemotherapy for Stage B/C colon cancer [1] [2]
Introduction
Description
These are data from one of the first successful trials of adjuvant chemotherapy for colon cancer. Levamisole is a low-toxicity compound previously used to treat worm infestations in animals; 5-FU is a moderately toxic (as these things go) chemotherapy agent. There are two records per person, one for recurrence and one for death.
The purpose of this project is to compare survival between the untreated (Obs) group vs those treated with amisole (Lev), or amisole + 5-FU.
Column names:
| id: | id |
| study: | 1 for all patients |
| rx: | Treatment - Obs(ervation), Lev(amisole), Lev(amisole)+5-FU |
| sex: | 1=male |
| age: | in years |
| obstruct: | obstruction of colon by tumour |
| perfor: | perforation of colon |
| adhere: | adherence to nearby organs |
| nodes: | number of lymph nodes with detectable cancer |
| time: | days until event or censoring |
| status: | censoring status |
| differ: | differentiation of tumour (1=well, 2=moderate, 3=poor) |
| extent: | Extent of local spread (1=submucosa, 2=muscle, 3=serosa, 4=contiguous structures) |
| surg: | time from surgery to registration (0=short, 1=long) |
| node4: | more than 4 positive lymph nodes |
| etype: | event type: 1=recurrence,2=death |
#Load data
colon <- as_tibble(colon)
head(colon)# A tibble: 6 × 16
id study rx sex age obstruct perfor adhere nodes status differ
<dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 Lev+5FU 1 43 0 0 0 5 1 2
2 1 1 Lev+5FU 1 43 0 0 0 5 1 2
3 2 1 Lev+5FU 1 63 0 0 0 1 0 2
4 2 1 Lev+5FU 1 63 0 0 0 1 0 2
5 3 1 Obs 0 71 0 0 1 7 1 2
6 3 1 Obs 0 71 0 0 1 7 1 2
# ℹ 5 more variables: extent <dbl>, surg <dbl>, node4 <dbl>, time <dbl>,
# etype <dbl>
Since the current analysis is focused on survival, filter data to death as the event type. This will create a data table with one row per individual.
colon_surv <- colon%>%filter(etype == 2) I. Exploratory data analysis
Check missing values
na_counts <- sapply(colon_surv, function(x)sum(is.na(x)))
na_counts id study rx sex age obstruct perfor adhere
0 0 0 0 0 0 0 0
nodes status differ extent surg node4 time etype
18 0 23 0 0 0 0 0
# replace NAs with mode
table(colon_surv$differ)
1 2 3
93 663 150
mode(colon_surv$differ)[1] "numeric"
median(colon_surv$nodes, na.rm= TRUE)[1] 2
colon_surv$differ <- if_else(is.na(colon_surv$differ), 2,colon_surv$differ)
colon_surv$nodes <- if_else(is.na(colon_surv$nodes), 2,colon_surv$nodes)Insight: only nodes and differ columns have NA values. Replacing the 23 NAs in differ column with mode, and replace NAs in nodes with median.
Evaluate continuous variables
# age
hist(colon_surv$age)hist(colon_surv$nodes)hist(colon_surv$time)Insight: Age is normally distributed. Number of nodes is skewed to the right. Time is fairly normally distributed with most the individuals having event time between 500-3000 days.
Evaluate nodes column to investigate outliers
t <- colon_surv%>%filter(node4 ==1) # samples with more than 4 positive lymph nodes
hist(t$nodes) Insight: samples with greater than 4 lymph nodes have less than 5 count in nodes column, so the two columns are not consistent. Therefore, nodes column will not be used for further analysis.
Evaluate categorical variables
summary_table <- colon_surv%>%summarise(count =n(),
male = sum(sex),
median_age = median(age),
ct_perforation = sum(perfor),
ct_adherence_nerby_organ = sum(adhere), censored = sum(status))
summary_table# A tibble: 1 × 6
count male median_age ct_perforation ct_adherence_nerby_organ censored
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 929 484 61 27 135 452
Insight: Total number of participants: 929. About half of the participants are male and about half were censored, while the other half died.
# rename categorical columns to make them descriptive
colon_surv <- colon_surv%>%mutate(differentiation = case_when(differ == 1 ~ "well",
differ == 2 ~ "moderate",
differ == 3 ~ "poor"),
local_spread = case_when(extent == 1 ~ "submucosa",
extent == 2 ~ "muscle",
extent == 3 ~ "serosa",
extent == 4 ~ "contiguous"),
surg_to_reg_time = case_when(surg == 0~ "short",
surg == 1 ~ "long"))Frequency tables for categorical variables
# frequency tables for categorical variables
# Tumor differentiation
colon_surv %>%
tabyl(differentiation, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() differentiation Obs Lev Lev+5FU
moderate 74.9% (236) 73.9% (229) 72.7% (221)
poor 16.5% (52) 14.2% (44) 17.8% (54)
well 8.6% (27) 11.9% (37) 9.5% (29)
# extent of local spread
colon_surv %>%
tabyl(local_spread, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() local_spread Obs Lev Lev+5FU
contiguous 6.3% (20) 3.9% (12) 3.6% (11)
muscle 12.1% (38) 11.6% (36) 10.5% (32)
serosa 79.0% (249) 83.5% (259) 82.6% (251)
submucosa 2.5% (8) 1.0% (3) 3.3% (10)
# colum obstruction
colon_surv %>%
tabyl(obstruct, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() obstruct Obs Lev Lev+5FU
0 80.0% (252) 79.7% (247) 82.2% (250)
1 20.0% (63) 20.3% (63) 17.8% (54)
# colon perforation
colon_surv %>%
tabyl(perfor, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() perfor Obs Lev Lev+5FU
0 97.1% (306) 96.8% (300) 97.4% (296)
1 2.9% (9) 3.2% (10) 2.6% (8)
# Adherance to nearby organs
colon_surv %>%
tabyl(adhere, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() adhere Obs Lev Lev+5FU
0 85.1% (268) 84.2% (261) 87.2% (265)
1 14.9% (47) 15.8% (49) 12.8% (39)
# extent of local tumor spread
colon_surv %>%
tabyl(local_spread, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() local_spread Obs Lev Lev+5FU
contiguous 6.3% (20) 3.9% (12) 3.6% (11)
muscle 12.1% (38) 11.6% (36) 10.5% (32)
serosa 79.0% (249) 83.5% (259) 82.6% (251)
submucosa 2.5% (8) 1.0% (3) 3.3% (10)
# More than 4 lymph nodes with cancer
colon_surv %>%
tabyl(node4, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() node4 Obs Lev Lev+5FU
0 72.4% (228) 71.3% (221) 74.0% (225)
1 27.6% (87) 28.7% (89) 26.0% (79)
# time from surgery to registration
colon_surv %>%
tabyl(surg, rx) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns() surg Obs Lev Lev+5FU
0 71.1% (224) 74.2% (230) 75.0% (228)
1 28.9% (91) 25.8% (80) 25.0% (76)
Summary statistics grouped by treatment
summary_table <- colon_surv%>%group_by(rx)%>%summarise(count =n(),
male = sum(sex),
median_age = median(age),
ct_perforation = sum(perfor),
ct_adherence_nerby_organ = sum(adhere),
perc_male = (male/count)*100,
iqr_age = IQR(age))
summary_table# A tibble: 3 × 8
rx count male median_age ct_perforation ct_adherence_nerby_o…¹ perc_male
<fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Obs 315 166 60 9 47 52.7
2 Lev 310 177 61 10 49 57.1
3 Lev+5FU 304 141 62 8 39 46.4
# ℹ abbreviated name: ¹ct_adherence_nerby_organ
# ℹ 1 more variable: iqr_age <dbl>
Insight: Each treatment group had about 300 participants. Median age, number of participants with perforation and adherence are similar between the three groups.
II. Table 1: Description of the study population
| Observation (%) | Amisole (%) | Amisole + 5-FU (%) | ||
|---|---|---|---|---|
| N=315 | N=310 | N=304 | ||
| Demographics | ||||
| Male | 166 (52.3) | 177 (57.1) | 141 | |
| Median age (years) [IQR] | 60 [53,68] | 61 [53,69] | 61 [52,70] | |
| Cancer characteristics | ||||
| Colon obstruction | 63 (20.0) | 63 (20.3) | 54 (17.8) | |
| Colon perforation | 9 (2.9) | 10 (3.2) | 8 (2.6) | |
| Adherence to nearby organs | 47 (14.9) | 49 (15.8) | 39 (12.8) | |
| Differentiation of tumor | ||||
| Well | 27 (8.6) | 37 (11.9) | 29 (9.5) | |
| Moderate | 236 (74.9) | 229 (73.9) | 221 (72.7) | |
| Poor | 52 (16.5) | 44 (14.2) | 54 (17.8) | |
| Extent of local spread | ||||
| Contiguous | 20 (6.3) | 12 (3.9) | 11 (3.6) | |
| Muscle | 38 (12.1) | 36 (11.6) | 32 (10.5) | |
| Serosa | 249 (79.0) | 259 (83.5) | 251 (82.6) | |
| Submucosa | 8 (2.5) | 3 (1.0) | 10 (3.3) | |
| More than 4 lymph nodes with cancer | Yes | 87 (27.6) | 89 (28.7) | 79 (26.0) |
| Short time from surgery to registration (%) | Yes | 91 (28.9) | 80 (25.8) | 76 (25.0) |
III. Methods
The Cox proportional hazards model was used to model the relationship between survival time and different lung cancer treatments. In particlular the survival time will be compared between the untreated group (observation) vs. those treated with amisole (Lev), or amisole + 5-FU. The Cox regression model was chosen for this study because it is useful for studying association between survival time of patients and predictors and allows estimating the relative risk or hazard ratios due to the covariates, i.e., treatment status. The time (in days) until event, i.e, death, will be modeled as a function of treatment and other variables, including age, sex, and various tumor characteristics. Significant predictors were included in the final model.
Statistical analysis
The R statistical software version 4.3.2 [3] was used for all analysis. The Survival package was used to construct the Cox regression model [2] [1].
Cox regression model is based on the hazard function \(h_x(t)\) with covariates at time t given by:
\(h_x(t)=h_0(t)\exp(\beta_1x_1 +\beta_2x_2 + \dots + \beta_p x_p)\)
Where:
\(h_x(t)\) is the hazard function
\(h_0(t)\) is the baseline hazard function
\(\beta_1x_1 + \beta_2x_2 + \dots +\beta_p x_p\) represent the linear combination of covariates and their coefficient
The hazards for the observation vs. amisole (Lev), or amisole + 5-FU group with covariate values x1 and x2 are given by: \(hx_1(t)=h_0(t)\exp(\beta_1x_1)\) and \(hx_2(t)=h_0(t)\exp(\beta_2x_2)\), respectively
The hazard ratio is expressed as: HR = \(hx_2(t)\) / \(hx_1(t)\) = \(\exp[\beta(x_2-x_1)]\)
The Schoenfeld residual plot was constructed to test Cox proportional hazards assumption. When the proportional hazards assumpiton was not met for any of the covariates, stratification approach was explored. The Survminer [4] package was used to plot the Kaplan-Meier curve to visualize the survival probability over time for each treatment group.
Multicolinearity was tested using Variant Inflation Factor (VIF) calculated using MASS package [5].
The R MASS package was used for Stepwise model selection, using “both” forward and backward variable selection [5]. For Stepwise selection, stepAIC() function uses AIC (Akaike Information Criterion) as the measure to add or remove predictors from the model. Model performance was evaluated using 100-fold cross-validation using the boot package [6] [7].
IV. Analysis: Cox regression model
Survival curve
# Create new incremental count id
colon_surv$idcount <- c(1:length(colon_surv$id))
# Order by survival time and create an order variable:
colon_surv <- colon_surv[order(-colon_surv$time, colon_surv$status),]
colon_surv$order <- c(1:length(colon_surv$idcount))
ggplot(data=colon_surv, aes(x=time, y=order)) +
geom_rect(xmin=23,xmax=colon_surv$time,ymin=colon_surv$order,ymax=colon_surv$order+1, colour="lightgray") +
geom_rect(xmin=colon_surv$time-2,xmax=colon_surv$time,ymin=colon_surv$order,ymax=colon_surv$order+1,
fill=factor(colon_surv$status+1)) +
geom_vline(xintercept= 1976,linetype="solid") +
scale_x_continuous(breaks=seq(20,3330,650)) +
geom_text(aes(2600, 750, label="Median Survival Time")) +
xlab("Survival Time (Days)") + ylab("Participants (ordered by survival time)") +
ggtitle("Survival Times for Participant") +
theme_classic() +
theme(legend.position="none",
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.background=element_blank(),
axis.line.x = element_line(color = "black"),
axis.line.y = element_line(color = "black"))Survival curve stratified by treatment group
library(survminer)
library(survival)
# Estimate the median survival time among the three groups
survfit(Surv(time,status) ~ rx, data = colon_surv)Call: survfit(formula = Surv(time, status) ~ rx, data = colon_surv)
n events median 0.95LCL 0.95UCL
rx=Obs 315 168 2083 1656 2789
rx=Lev 310 161 2152 1540 NA
rx=Lev+5FU 304 123 NA 2725 NA
# count the number of events after 2080 days, which is the median survival time among the observation group
tt <- colon_surv%>%filter(time > 2083)%>% group_by(rx)%>%summarise(ct = n(),
death = sum(status))
# Plot survival curve
fit <- survfit(Surv(time,status) ~ rx, data = colon_surv)
ggsurvplot(fit, data=colon_surv, risk.table = TRUE)# Estimate the probability of surviving beyond 3000 days
summary(survfit(Surv(time, status) ~ rx, data = colon_surv), times = 3000)Call: survfit(formula = Surv(time, status) ~ rx, data = colon_surv)
rx=Obs
time n.risk n.event survival std.err lower 95% CI
3.00e+03 6.00e+00 1.68e+02 4.08e-01 3.97e-02 3.37e-01
upper 95% CI
4.94e-01
rx=Lev
time n.risk n.event survival std.err lower 95% CI
3.00e+03 4.00e+00 1.61e+02 3.92e-01 5.63e-02 2.96e-01
upper 95% CI
5.20e-01
rx=Lev+5FU
time n.risk n.event survival std.err lower 95% CI
3.00e+03 7.00e+00 1.23e+02 5.61e-01 3.43e-02 4.97e-01
upper 95% CI
6.32e-01
# compare significant differences in survival times between the three groups
survdiff(Surv(time, status)~ rx, data = colon_surv)Call:
survdiff(formula = Surv(time, status) ~ rx, data = colon_surv)
N Observed Expected (O-E)^2/E (O-E)^2/V
rx=Obs 315 168 148 2.58 3.85
rx=Lev 310 161 146 1.52 2.25
rx=Lev+5FU 304 123 157 7.55 11.62
Chisq= 11.7 on 2 degrees of freedom, p= 0.003
Insight: Based on the survival curve, the mediant survival time for the observation group is 2083 days. However, the median survival of Lev and Lev+5Fu group cannot be estimated, because there are too few events after 2083 days, which is the median survival time in the observation group.
The time for 50% survival probability of the group treated with Lev+5Fu is over 3000 days while the survival time for the observation and Lev group is around 2080 days. The probability of surviving to 3000 days among the Lev+5FU group is 56% (95% CI: 50-63), compared to 41% among the observation group.
The survival time is significantly different (P=0.003) between the three groups.
Insight: None of the variables have VIF values above 5, therefore there is no multicollinearity
Cox regression models
# Subset data for modeling
df <- colon_surv%>%dplyr::select(!c(id,study,etype,differ, extent,surg_to_reg_time, idcount, order, nodes))Base Model
m0 <- coxph(Surv(time, status) ~ 1, data = df)
summary_m0 = summary(m0)
c_index_m0 <- concordance(m0)
cat("Concordance of the base model:",c_index_m0$concordance)Concordance of the base model: 0.5
# Calculate the baseline hazard function
baseline_hazard <- basehaz(m0, centered = FALSE)
# Print the baseline hazard
print(baseline_hazard) hazard time
1 0.001076426 23
2 0.002154012 24
3 0.003232761 34
4 0.004312675 45
5 0.005393756 52
6 0.006476007 56
7 0.007559431 79
8 0.008644029 93
9 0.009729806 113
10 0.010816762 122
11 0.011904901 125
12 0.012994226 127
13 0.014084739 129
14 0.015176442 133
15 0.016269338 138
16 0.017363430 141
17 0.018458720 144
18 0.019555211 145
19 0.020652906 150
20 0.021751807 164
21 0.022851917 165
22 0.023953239 166
23 0.026159527 171
24 0.027264500 186
25 0.028370694 187
26 0.029478114 191
27 0.030586761 201
28 0.031696639 206
29 0.032807751 208
30 0.033920098 215
31 0.035033683 218
32 0.037264582 219
33 0.038381900 222
34 0.039500469 226
35 0.040620289 232
36 0.041741366 238
37 0.042863700 241
38 0.043987296 242
39 0.045112155 251
40 0.046238281 253
41 0.047365677 257
42 0.049624289 259
43 0.050755510 264
44 0.051888013 269
45 0.053021800 271
46 0.054156874 274
47 0.055293237 275
48 0.056430894 276
49 0.057569846 279
50 0.059851649 283
51 0.060994506 289
52 0.062138671 293
53 0.063284147 302
54 0.064430936 304
55 0.065579041 311
56 0.066728466 313
57 0.069031288 314
58 0.070184691 316
59 0.071339425 322
60 0.072495495 323
61 0.073652902 324
62 0.074811650 326
63 0.075971743 331
64 0.077133183 340
65 0.078295974 342
66 0.079460119 343
67 0.080625620 349
68 0.082960705 355
69 0.084130296 356
70 0.085301256 362
71 0.086473589 363
72 0.087647298 365
73 0.088822386 366
74 0.089998857 372
75 0.091176713 376
76 0.092355958 381
77 0.093536596 382
78 0.095902061 384
79 0.097086895 389
80 0.098273134 390
81 0.099460783 400
82 0.100649844 402
83 0.101840320 406
84 0.103032215 409
85 0.104225532 411
86 0.106616448 413
87 0.107814052 417
88 0.109013093 420
89 0.110213573 421
90 0.111415496 422
91 0.112618866 428
92 0.115029958 430
93 0.116237687 433
94 0.117446877 437
95 0.119869652 438
96 0.121083244 439
97 0.122298311 441
98 0.123514856 443
99 0.124732883 444
100 0.125952395 448
101 0.125952395 453
102 0.127174889 454
103 0.128398879 459
104 0.129624369 460
105 0.130851363 462
106 0.132079865 464
107 0.134541404 465
108 0.135774450 469
109 0.137009017 472
110 0.138245111 474
111 0.139482735 475
112 0.140721893 484
113 0.141962587 485
114 0.143204823 486
115 0.144448604 490
116 0.145693934 498
117 0.149439257 499
118 0.150690821 503
119 0.151943954 506
120 0.153198659 510
121 0.154454941 512
122 0.155712802 522
123 0.156972248 528
124 0.158233282 529
125 0.159495908 537
126 0.160760131 546
127 0.162025954 550
128 0.163293381 553
129 0.164562416 559
130 0.167105329 563
131 0.168379214 569
132 0.169654724 570
133 0.170931864 573
134 0.173491046 576
135 0.174773097 578
136 0.177342141 580
137 0.178629142 582
138 0.179917802 583
139 0.181208125 587
140 0.182500115 589
141 0.183793776 591
142 0.185089112 592
143 0.186386129 594
144 0.187684831 595
145 0.188985221 599
146 0.190287304 601
147 0.192896568 602
148 0.194203758 603
149 0.195512658 608
150 0.196823274 609
151 0.198135610 612
152 0.199449670 614
153 0.200765460 616
154 0.202082983 622
155 0.203402244 628
156 0.204723248 629
157 0.206045999 641
158 0.208696763 642
159 0.211354571 643
160 0.212686129 647
161 0.214019462 659
162 0.215354576 663
163 0.216691474 664
164 0.218030162 665
165 0.219370645 666
166 0.220712927 669
167 0.222057013 670
168 0.223402908 673
169 0.224750617 674
170 0.226100144 675
171 0.227451496 678
172 0.228804676 684
173 0.230159689 685
174 0.231516541 687
175 0.235598179 692
176 0.236962436 693
177 0.238328556 696
178 0.239696545 706
179 0.241066408 708
180 0.243811776 709
181 0.245187292 712
182 0.246564703 716
183 0.247944013 717
184 0.249325228 718
185 0.250708354 720
186 0.252093396 721
187 0.253480358 723
188 0.254869247 729
189 0.256260068 730
190 0.257652826 736
191 0.259047526 739
192 0.261842775 743
193 0.263243335 753
194 0.264645860 755
195 0.266050354 758
196 0.268865275 759
197 0.270275712 760
198 0.271688141 761
199 0.273102569 764
200 0.274518999 765
201 0.275937439 766
202 0.277357893 770
203 0.278780369 774
204 0.280204870 775
205 0.281631403 795
206 0.283059975 797
207 0.285923255 802
208 0.288794757 806
209 0.290233606 811
210 0.291674528 832
211 0.294562616 833
212 0.296009794 840
213 0.297459069 844
214 0.298910448 845
215 0.300363936 846
216 0.301819541 854
217 0.303277266 858
218 0.304737121 862
219 0.306199109 863
220 0.307663238 874
221 0.309129513 875
222 0.310597942 883
223 0.312068530 884
224 0.313541284 885
225 0.317972605 887
226 0.319454087 890
227 0.320937766 901
228 0.322423651 902
229 0.325402059 905
230 0.326894596 909
231 0.328389364 911
232 0.329886370 916
233 0.331385621 924
234 0.332887122 928
235 0.334390882 929
236 0.335896906 936
237 0.337405201 938
238 0.338915775 939
239 0.340428635 940
240 0.341943786 942
241 0.343461237 944
242 0.344980994 949
243 0.346503064 952
244 0.348027454 957
245 0.354148359 961
246 0.355684458 963
247 0.357222919 966
248 0.358763751 968
249 0.360306961 969
250 0.361852556 976
251 0.363400544 977
252 0.364950931 986
253 0.366503727 993
254 0.369616569 997
255 0.371176631 1018
256 0.372739131 1021
257 0.374304076 1022
258 0.375871475 1031
259 0.377441333 1034
260 0.379013660 1037
261 0.380588464 1041
262 0.382165751 1046
263 0.383745529 1048
264 0.385327808 1055
265 0.386912594 1061
266 0.388499896 1070
267 0.390089721 1079
268 0.391682077 1083
269 0.393276974 1092
270 0.394874418 1101
271 0.396474418 1103
272 0.398076982 1105
273 0.399682118 1112
274 0.401289835 1117
275 0.402900141 1122
276 0.404513045 1133
277 0.406128553 1134
278 0.407746676 1135
279 0.409367422 1136
280 0.410990799 1138
281 0.412616815 1139
282 0.415876801 1145
283 0.417510788 1151
284 0.419147449 1154
285 0.420786793 1159
286 0.422428829 1161
287 0.424073566 1166
288 0.427371178 1178
289 0.429024070 1186
290 0.430679699 1191
291 0.432338074 1193
292 0.433999204 1195
293 0.435663097 1198
294 0.437329764 1201
295 0.438999213 1207
296 0.440671454 1209
297 0.442346496 1212
298 0.444024348 1215
299 0.445705020 1216
300 0.447388522 1219
301 0.449074862 1230
302 0.450764052 1237
303 0.454151014 1246
304 0.455848807 1252
305 0.459253065 1262
306 0.460959550 1272
307 0.462668951 1273
308 0.466096546 1276
309 0.467814759 1279
310 0.469538897 1290
311 0.472996116 1295
312 0.474729219 1302
313 0.476465330 1304
314 0.478204460 1306
315 0.479946621 1313
316 0.481691821 1314
317 0.483440073 1325
318 0.485191386 1327
319 0.486945772 1363
320 0.488703242 1365
321 0.490463805 1375
322 0.492227473 1387
323 0.493994258 1388
324 0.495764169 1399
325 0.497537219 1405
326 0.497537219 1421
327 0.499316579 1424
328 0.502884824 1434
329 0.504673733 1437
330 0.506465847 1439
331 0.508261180 1446
332 0.510059741 1447
333 0.510059741 1474
334 0.511864795 1482
335 0.513673113 1495
336 0.515484707 1509
337 0.517299589 1511
338 0.519117771 1521
339 0.520939265 1530
340 0.520939265 1537
341 0.522767418 1540
342 0.526433783 1548
343 0.528272018 1550
344 0.530113639 1568
345 0.531958657 1607
346 0.533807086 1620
347 0.535658938 1637
348 0.537514225 1652
349 0.539372961 1656
350 0.541235159 1668
351 0.543100830 1671
352 0.544969989 1679
353 0.546842648 1692
354 0.548718821 1709
355 0.550598520 1723
356 0.552481759 1745
357 0.554368552 1752
358 0.556258911 1767
359 0.558152850 1768
360 0.560050384 1772
361 0.561951524 1783
362 0.563856286 1788
363 0.565764683 1790
364 0.565764683 1792
365 0.567680392 1798
366 0.567680392 1800
367 0.567680392 1803
368 0.569607174 1812
369 0.569607174 1814
370 0.571545159 1818
371 0.571545159 1819
372 0.571545159 1820
373 0.571545159 1823
374 0.571545159 1827
375 0.571545159 1828
376 0.573513663 1829
377 0.575486049 1831
378 0.575486049 1838
379 0.577466247 1839
380 0.579450374 1850
381 0.581438446 1851
382 0.581438446 1852
383 0.581438446 1853
384 0.581438446 1855
385 0.583442454 1856
386 0.583442454 1861
387 0.583442454 1864
388 0.583442454 1870
389 0.585466745 1875
390 0.587499266 1879
391 0.589540082 1884
392 0.591585072 1885
393 0.591585072 1891
394 0.593638460 1896
395 0.593638460 1902
396 0.593638460 1905
397 0.595713149 1907
398 0.595713149 1913
399 0.597796482 1915
400 0.597796482 1918
401 0.597796482 1929
402 0.599901745 1932
403 0.599901745 1935
404 0.599901745 1939
405 0.599901745 1944
406 0.602024887 1950
407 0.602024887 1964
408 0.602024887 1968
409 0.602024887 1969
410 0.602024887 1971
411 0.602024887 1976
412 0.602024887 1980
413 0.602024887 1981
414 0.602024887 1985
415 0.602024887 1990
416 0.602024887 1992
417 0.604203537 1995
418 0.604203537 1997
419 0.604203537 2001
420 0.604203537 2006
421 0.604203537 2008
422 0.604203537 2018
423 0.604203537 2020
424 0.606430708 2021
425 0.608662851 2023
426 0.608662851 2025
427 0.608662851 2029
428 0.608662851 2030
429 0.608662851 2043
430 0.608662851 2047
431 0.608662851 2050
432 0.610935578 2052
433 0.610935578 2055
434 0.610935578 2058
435 0.610935578 2065
436 0.610935578 2066
437 0.610935578 2070
438 0.610935578 2072
439 0.610935578 2076
440 0.613277499 2077
441 0.615624916 2079
442 0.617977858 2083
443 0.617977858 2084
444 0.620341924 2085
445 0.620341924 2093
446 0.620341924 2097
447 0.620341924 2099
448 0.620341924 2100
449 0.620341924 2101
450 0.620341924 2103
451 0.620341924 2107
452 0.620341924 2110
453 0.620341924 2111
454 0.620341924 2113
455 0.620341924 2114
456 0.620341924 2117
457 0.620341924 2118
458 0.620341924 2120
459 0.620341924 2122
460 0.620341924 2126
461 0.622841924 2127
462 0.625348190 2128
463 0.625348190 2129
464 0.625348190 2130
465 0.625348190 2132
466 0.627886261 2133
467 0.627886261 2136
468 0.627886261 2138
469 0.627886261 2139
470 0.627886261 2142
471 0.627886261 2144
472 0.627886261 2147
473 0.627886261 2148
474 0.627886261 2149
475 0.630504062 2152
476 0.630504062 2153
477 0.630504062 2154
478 0.630504062 2156
479 0.630504062 2157
480 0.630504062 2158
481 0.630504062 2161
482 0.630504062 2162
483 0.630504062 2164
484 0.630504062 2165
485 0.630504062 2167
486 0.630504062 2169
487 0.630504062 2170
488 0.636021314 2171
489 0.636021314 2173
490 0.638799091 2174
491 0.638799091 2176
492 0.638799091 2179
493 0.638799091 2180
494 0.638799091 2181
495 0.638799091 2183
496 0.638799091 2184
497 0.638799091 2185
498 0.638799091 2186
499 0.638799091 2187
500 0.638799091 2188
501 0.638799091 2189
502 0.638799091 2190
503 0.638799091 2191
504 0.638799091 2192
505 0.638799091 2194
506 0.638799091 2195
507 0.641793103 2197
508 0.641793103 2198
509 0.641793103 2200
510 0.641793103 2202
511 0.641793103 2203
512 0.641793103 2204
513 0.641793103 2207
514 0.641793103 2209
515 0.641793103 2210
516 0.641793103 2212
517 0.644927900 2213
518 0.644927900 2219
519 0.644927900 2221
520 0.644927900 2222
521 0.644927900 2227
522 0.644927900 2229
523 0.644927900 2231
524 0.644927900 2232
525 0.644927900 2234
526 0.644927900 2235
527 0.644927900 2237
528 0.644927900 2240
529 0.644927900 2242
530 0.644927900 2244
531 0.644927900 2245
532 0.644927900 2250
533 0.644927900 2252
534 0.644927900 2254
535 0.644927900 2255
536 0.648329260 2257
537 0.648329260 2261
538 0.648329260 2263
539 0.648329260 2264
540 0.648329260 2265
541 0.648329260 2267
542 0.648329260 2269
543 0.648329260 2270
544 0.648329260 2274
545 0.648329260 2277
546 0.648329260 2278
547 0.648329260 2279
548 0.648329260 2280
549 0.651926382 2284
550 0.655549571 2287
551 0.655549571 2289
552 0.655549571 2290
553 0.655549571 2292
554 0.655549571 2294
555 0.655549571 2299
556 0.655549571 2300
557 0.655549571 2303
558 0.655549571 2304
559 0.655549571 2309
560 0.655549571 2310
561 0.655549571 2312
562 0.655549571 2313
563 0.655549571 2316
564 0.659395725 2318
565 0.659395725 2321
566 0.659395725 2323
567 0.659395725 2324
568 0.659395725 2325
569 0.659395725 2326
570 0.659395725 2330
571 0.659395725 2331
572 0.659395725 2332
573 0.659395725 2339
574 0.659395725 2343
575 0.659395725 2345
576 0.659395725 2347
577 0.659395725 2349
578 0.659395725 2350
579 0.663545102 2351
580 0.663545102 2352
581 0.663545102 2353
582 0.663545102 2355
583 0.663545102 2356
584 0.663545102 2360
585 0.663545102 2362
586 0.663545102 2364
587 0.663545102 2365
588 0.663545102 2375
589 0.663545102 2378
590 0.663545102 2380
591 0.663545102 2381
592 0.663545102 2384
593 0.663545102 2385
594 0.663545102 2386
595 0.663545102 2387
596 0.663545102 2391
597 0.663545102 2393
598 0.663545102 2394
599 0.663545102 2396
600 0.663545102 2408
601 0.663545102 2410
602 0.663545102 2413
603 0.663545102 2414
604 0.663545102 2416
605 0.663545102 2422
606 0.663545102 2423
607 0.663545102 2429
608 0.663545102 2435
609 0.663545102 2439
610 0.663545102 2441
611 0.663545102 2442
612 0.663545102 2444
613 0.663545102 2445
614 0.663545102 2447
615 0.663545102 2449
616 0.663545102 2450
617 0.663545102 2456
618 0.668647143 2458
619 0.668647143 2460
620 0.668647143 2461
621 0.668647143 2467
622 0.668647143 2472
623 0.668647143 2474
624 0.668647143 2481
625 0.674023487 2482
626 0.674023487 2484
627 0.674023487 2485
628 0.674023487 2486
629 0.674023487 2487
630 0.674023487 2488
631 0.674023487 2490
632 0.674023487 2495
633 0.674023487 2496
634 0.674023487 2497
635 0.674023487 2501
636 0.674023487 2503
637 0.674023487 2505
638 0.674023487 2506
639 0.674023487 2507
640 0.674023487 2509
641 0.674023487 2513
642 0.674023487 2517
643 0.674023487 2520
644 0.674023487 2525
645 0.680352601 2527
646 0.680352601 2528
647 0.680352601 2530
648 0.680352601 2532
649 0.680352601 2538
650 0.680352601 2540
651 0.680352601 2541
652 0.687109358 2542
653 0.687109358 2544
654 0.687109358 2545
655 0.687109358 2547
656 0.687109358 2548
657 0.687109358 2551
658 0.694151611 2552
659 0.694151611 2555
660 0.694151611 2558
661 0.694151611 2562
662 0.694151611 2565
663 0.694151611 2568
664 0.694151611 2572
665 0.694151611 2574
666 0.694151611 2577
667 0.694151611 2580
668 0.694151611 2588
669 0.694151611 2590
670 0.694151611 2592
671 0.702088119 2593
672 0.702088119 2598
673 0.702088119 2600
674 0.702088119 2613
675 0.702088119 2618
676 0.702088119 2621
677 0.702088119 2628
678 0.702088119 2630
679 0.702088119 2631
680 0.702088119 2648
681 0.702088119 2651
682 0.702088119 2653
683 0.702088119 2656
684 0.702088119 2663
685 0.702088119 2668
686 0.702088119 2672
687 0.702088119 2674
688 0.702088119 2676
689 0.702088119 2678
690 0.702088119 2679
691 0.702088119 2682
692 0.711989109 2683
693 0.711989109 2688
694 0.711989109 2690
695 0.711989109 2691
696 0.711989109 2695
697 0.711989109 2697
698 0.711989109 2699
699 0.711989109 2703
700 0.711989109 2706
701 0.711989109 2708
702 0.711989109 2709
703 0.711989109 2711
704 0.711989109 2713
705 0.711989109 2716
706 0.723483362 2718
707 0.723483362 2720
708 0.723483362 2723
709 0.723483362 2724
710 0.735678484 2725
711 0.735678484 2726
712 0.735678484 2730
713 0.735678484 2731
714 0.735678484 2732
715 0.735678484 2737
716 0.735678484 2739
717 0.735678484 2740
718 0.735678484 2743
719 0.735678484 2747
720 0.735678484 2754
721 0.735678484 2760
722 0.735678484 2761
723 0.735678484 2764
724 0.735678484 2765
725 0.735678484 2772
726 0.735678484 2775
727 0.735678484 2779
728 0.735678484 2781
729 0.753535627 2789
730 0.753535627 2794
731 0.753535627 2800
732 0.753535627 2802
733 0.753535627 2815
734 0.753535627 2820
735 0.753535627 2821
736 0.753535627 2826
737 0.753535627 2828
738 0.753535627 2835
739 0.753535627 2840
740 0.753535627 2842
741 0.753535627 2849
742 0.753535627 2862
743 0.753535627 2869
744 0.753535627 2871
745 0.753535627 2883
746 0.753535627 2889
747 0.753535627 2899
748 0.753535627 2901
749 0.753535627 2905
750 0.753535627 2908
751 0.785793692 2910
752 0.785793692 2913
753 0.785793692 2915
754 0.785793692 2916
755 0.785793692 2918
756 0.785793692 2925
757 0.785793692 2927
758 0.785793692 2941
759 0.785793692 2950
760 0.785793692 2951
761 0.785793692 2958
762 0.785793692 2969
763 0.785793692 2985
764 0.785793692 3000
765 0.785793692 3017
766 0.785793692 3019
767 0.785793692 3024
768 0.785793692 3030
769 0.785793692 3078
770 0.785793692 3085
771 0.785793692 3087
772 0.785793692 3173
773 0.785793692 3185
774 0.785793692 3192
775 0.785793692 3214
776 0.785793692 3238
777 0.785793692 3308
778 0.785793692 3309
779 0.785793692 3325
780 0.785793692 3329
# Plot the cumulative baseline hazard function
ggplot(baseline_hazard, aes(x = time, y = hazard)) +
geom_step() +
labs(title = "Cumulative Baseline Hazard Function",
x = "Time",
y = "Cumulative Baseline Hazard") +
theme_minimal()Univariate analysis
#| echo: true
#| message: false
#| warning: false
# Univariate analysis
m1 <- coxph(Surv(time, status) ~ rx, data = df)
summary(m1)Call:
coxph(formula = Surv(time, status) ~ rx, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
rxLev -0.02664 0.97371 0.11030 -0.241 0.80917
rxLev+5FU -0.37171 0.68955 0.11875 -3.130 0.00175 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
rxLev 0.9737 1.027 0.7844 1.2087
rxLev+5FU 0.6896 1.450 0.5464 0.8703
Concordance= 0.536 (se = 0.013 )
Likelihood ratio test= 12.15 on 2 df, p=0.002
Wald test = 11.56 on 2 df, p=0.003
Score (logrank) test = 11.68 on 2 df, p=0.003
c_index_m1 <- concordance(m1)
cat("Concordance of the univariate model:",c_index_m1$concordance)Concordance of the univariate model: 0.535769
anova(m0, m1) # Addition of rx variable significantly improved base modelAnalysis of Deviance Table
Cox model: response is Surv(time, status)
Model 1: ~ 1
Model 2: ~ rx
loglik Chisq Df Pr(>|Chi|)
1 -2930.2
2 -2924.1 12.148 2 0.002302 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Insight: the coefficient of Lev is not significant, suggesting that there is no evidence that this treatment affects survival time compared to observation. however Lev+5Fu is significant (p=0.00175), indicating that the treatment Lev +5Fu has a statistically significant effect on survival time compared to the reference group. The negative sign indicates that this treatment group has a lower hazard and likely a longer survival time.
The hazard ratio for Lex+5FU (0.690), indicating the risk of death is about 31% lower compared to the observation group.
The p-values indicate that the model is significant.
Test the Cox proportional hazard assumption of m1
cox.zph(m1) chisq df p
rx 1.48 2 0.48
GLOBAL 1.48 2 0.48
zph_test <- cox.zph(m1)
print(zph_test) chisq df p
rx 1.48 2 0.48
GLOBAL 1.48 2 0.48
# plot the Schoenfeld residuals
plot(zph_test)Insight: The Schoenfeld residal plot shows that the residuals are scattered randomly and the smooth trend line is horizontal near 0. This suggests that the hazard ratio for rx (treatment status) is constant over time and the proportional hazard assumption is met. The global p-value is >0.05, indicating that the the assumption is met.
Multivariate analysis
# Include all variables to determine which predictors are significant.
names(df) [1] "rx" "sex" "age" "obstruct"
[5] "perfor" "adhere" "status" "surg"
[9] "node4" "time" "differentiation" "local_spread"
# multivariate analysis
m2 <- coxph(Surv(time, status) ~ rx+ age + sex + perfor + adhere + surg + obstruct + differentiation + node4+
local_spread, data = df)
summary(m2)Call:
coxph(formula = Surv(time, status) ~ rx + age + sex + perfor +
adhere + surg + obstruct + differentiation + node4 + local_spread,
data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
rxLev -0.0160445 0.9840835 0.1115300 -0.144 0.885612
rxLev+5FU -0.3742695 0.6877915 0.1196533 -3.128 0.001760 **
age 0.0070459 1.0070708 0.0040596 1.736 0.082633 .
sex 0.0412783 1.0421421 0.0952633 0.433 0.664791
perfor 0.0005371 1.0005373 0.2695697 0.002 0.998410
adhere 0.1689086 1.1840119 0.1295242 1.304 0.192210
surg 0.2362177 1.2664500 0.1032961 2.287 0.022207 *
obstruct 0.2865577 1.3318350 0.1173103 2.443 0.014576 *
differentiationpoor 0.3579964 1.4304604 0.1222148 2.929 0.003398 **
differentiationwell 0.0812878 1.0846830 0.1662752 0.489 0.624930
node4 0.9353140 2.5480134 0.0988712 9.460 < 2e-16 ***
local_spreadmuscle -0.9503933 0.3865890 0.2554577 -3.720 0.000199 ***
local_spreadserosa -0.4526719 0.6359268 0.1986566 -2.279 0.022687 *
local_spreadsubmucosa -1.2432513 0.2884449 0.5396100 -2.304 0.021224 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
rxLev 0.9841 1.0162 0.7909 1.2245
rxLev+5FU 0.6878 1.4539 0.5440 0.8696
age 1.0071 0.9930 0.9991 1.0151
sex 1.0421 0.9596 0.8646 1.2561
perfor 1.0005 0.9995 0.5899 1.6970
adhere 1.1840 0.8446 0.9186 1.5262
surg 1.2664 0.7896 1.0343 1.5507
obstruct 1.3318 0.7508 1.0583 1.6761
differentiationpoor 1.4305 0.6991 1.1258 1.8176
differentiationwell 1.0847 0.9219 0.7830 1.5026
node4 2.5480 0.3925 2.0991 3.0929
local_spreadmuscle 0.3866 2.5867 0.2343 0.6378
local_spreadserosa 0.6359 1.5725 0.4308 0.9387
local_spreadsubmucosa 0.2884 3.4669 0.1002 0.8306
Concordance= 0.674 (se = 0.012 )
Likelihood ratio test= 147 on 14 df, p=<2e-16
Wald test = 150.3 on 14 df, p=<2e-16
Score (logrank) test = 161.3 on 14 df, p=<2e-16
c_index_m2 <- concordance(m2)
cat("Concordance of the multivariate model:",c_index_m2$concordance)Concordance of the multivariate model: 0.6736097
# Determine significant predictors
anova(m2)Analysis of Deviance Table
Cox model: response is Surv(time, status)
Terms added sequentially (first to last)
loglik Chisq Df Pr(>|Chi|)
NULL -2930.2
rx -2924.1 12.1478 2 0.0023022 **
age -2923.9 0.3443 1 0.5573434
sex -2923.9 0.0000 1 0.9964141
perfor -2923.8 0.3345 1 0.5630435
adhere -2921.3 5.0032 1 0.0253001 *
surg -2919.1 4.3247 1 0.0375630 *
obstruct -2916.9 4.4808 1 0.0342771 *
differentiation -2909.8 14.0968 2 0.0008688 ***
node4 -2865.5 88.5800 1 < 2.2e-16 ***
local_spread -2856.7 17.6698 3 0.0005145 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m1, m2)Analysis of Deviance Table
Cox model: response is Surv(time, status)
Model 1: ~ 1
Model 2: ~ rx
Model 3: ~ rx + age + sex + perfor + adhere + surg + obstruct + differentiation + node4 + local_spread
loglik Chisq Df Pr(>|Chi|)
1 -2930.2
2 -2924.1 12.148 2 0.002302 **
3 -2856.7 134.834 12 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Insight: When all variables are included in the model, the anova test indicates that rx, adhere, surg, obstruct, differentiation, node4 and local spread are significant predictors. Additionally, model concordance did not improve when removing predictors that were not significant in m2
The concordance of the multivariable model, 0.674, is higher than the univariate model (m1, concordance =0.53), suggesting that the multivariate model is a better fit model.
Evaluate multicollinearity using Variance Inflation Factor (VIF)
vif <- vif(m2)
print(vif) GVIF Df GVIF^(1/(2*Df))
rx 1.035127 2 1.008668
age 1.042883 1 1.021217
sex 1.022620 1 1.011247
perfor 1.053420 1 1.026363
adhere 1.092721 1 1.045333
surg 1.013876 1 1.006914
obstruct 1.055334 1 1.027295
differentiation 1.062714 2 1.015323
node4 1.046133 1 1.022806
local_spread 1.091169 3 1.014648
Evaluate significance of predictors. Model survival while including different cancer characteristics as predictors separately to identify significance predictors.
# model including all variables
m2 <- coxph(Surv(time, status) ~ rx+ age + sex + perfor + adhere + surg + obstruct + differentiation + node4+
local_spread, data = df)
# Treatment
m2a <- coxph(Surv(time, status) ~ rx, data = df) # significant
summary(m2a)Call:
coxph(formula = Surv(time, status) ~ rx, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
rxLev -0.02664 0.97371 0.11030 -0.241 0.80917
rxLev+5FU -0.37171 0.68955 0.11875 -3.130 0.00175 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
rxLev 0.9737 1.027 0.7844 1.2087
rxLev+5FU 0.6896 1.450 0.5464 0.8703
Concordance= 0.536 (se = 0.013 )
Likelihood ratio test= 12.15 on 2 df, p=0.002
Wald test = 11.56 on 2 df, p=0.003
Score (logrank) test = 11.68 on 2 df, p=0.003
# Demographics
m2b <- coxph(Surv(time, status) ~ age + sex, data = df) # not significant
summary(m2b)Call:
coxph(formula = Surv(time, status) ~ age + sex, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
age 0.001951 1.001952 0.004031 0.484 0.628
sex 0.012955 1.013040 0.094205 0.138 0.891
exp(coef) exp(-coef) lower .95 upper .95
age 1.002 0.9981 0.9941 1.010
sex 1.013 0.9871 0.8422 1.218
Concordance= 0.511 (se = 0.014 )
Likelihood ratio test= 0.26 on 2 df, p=0.9
Wald test = 0.25 on 2 df, p=0.9
Score (logrank) test = 0.25 on 2 df, p=0.9
# cancer characteristics
m2c <- coxph(Surv(time, status) ~ perfor + adhere + obstruct, data = df) # adhere and obstruct are significant
summary(m2c)Call:
coxph(formula = Surv(time, status) ~ perfor + adhere + obstruct,
data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
perfor -0.03415 0.96643 0.26932 -0.127 0.8991
adhere 0.31011 1.36358 0.12572 2.467 0.0136 *
obstruct 0.25794 1.29426 0.11547 2.234 0.0255 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
perfor 0.9664 1.0347 0.5701 1.638
adhere 1.3636 0.7334 1.0658 1.745
obstruct 1.2943 0.7726 1.0321 1.623
Concordance= 0.536 (se = 0.012 )
Likelihood ratio test= 10.82 on 3 df, p=0.01
Wald test = 11.51 on 3 df, p=0.009
Score (logrank) test = 11.6 on 3 df, p=0.009
# Differentiation of tumor
m2d <- coxph(Surv(time, status) ~ differentiation, data = df) # significant
summary(m2d)Call:
coxph(formula = Surv(time, status) ~ differentiation, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
differentiationpoor 0.48265 1.62036 0.12040 4.009 6.1e-05 ***
differentiationwell -0.05027 0.95097 0.16408 -0.306 0.759
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
differentiationpoor 1.620 0.6171 1.2798 2.052
differentiationwell 0.951 1.0516 0.6895 1.312
Concordance= 0.544 (se = 0.012 )
Likelihood ratio test= 15.34 on 2 df, p=5e-04
Wald test = 16.97 on 2 df, p=2e-04
Score (logrank) test = 17.31 on 2 df, p=2e-04
# Extent of local spread
m2e <- coxph(Surv(time, status) ~ local_spread, data = df) # significant
summary(m2e)Call:
coxph(formula = Surv(time, status) ~ local_spread, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
local_spreadmuscle -1.0892 0.3365 0.2498 -4.361 1.29e-05 ***
local_spreadserosa -0.5043 0.6039 0.1927 -2.617 0.00888 **
local_spreadsubmucosa -1.7283 0.1776 0.5336 -3.239 0.00120 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
local_spreadmuscle 0.3365 2.972 0.2062 0.5490
local_spreadserosa 0.6039 1.656 0.4139 0.8811
local_spreadsubmucosa 0.1776 5.631 0.0624 0.5054
Concordance= 0.552 (se = 0.009 )
Likelihood ratio test= 29.21 on 3 df, p=2e-06
Wald test = 25.35 on 3 df, p=1e-05
Score (logrank) test = 26.94 on 3 df, p=6e-06
# short time from surgery to registration
m2f <- coxph(Surv(time, status) ~ surg, data = df) # significant
summary(m2f)Call:
coxph(formula = Surv(time, status) ~ surg, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
surg 0.2333 1.2627 0.1026 2.274 0.0229 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
surg 1.263 0.7919 1.033 1.544
Concordance= 0.523 (se = 0.011 )
Likelihood ratio test= 5.01 on 1 df, p=0.03
Wald test = 5.17 on 1 df, p=0.02
Score (logrank) test = 5.2 on 1 df, p=0.02
# include predictors significant in the model which included all predictors (m2)
m3 <- coxph(Surv(time, status) ~ rx + surg + obstruct + differentiation + node4
+ local_spread, data = df)
summary(m3)Call:
coxph(formula = Surv(time, status) ~ rx + surg + obstruct + differentiation +
node4 + local_spread, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
rxLev -0.006658 0.993365 0.111374 -0.060 0.952333
rxLev+5FU -0.371547 0.689666 0.119513 -3.109 0.001878 **
surg 0.244822 1.277393 0.103165 2.373 0.017639 *
obstruct 0.255151 1.290656 0.115179 2.215 0.026743 *
differentiationpoor 0.381493 1.464469 0.121454 3.141 0.001683 **
differentiationwell 0.062221 1.064197 0.165705 0.375 0.707295
node4 0.908394 2.480335 0.097820 9.286 < 2e-16 ***
local_spreadmuscle -0.977922 0.376092 0.251726 -3.885 0.000102 ***
local_spreadserosa -0.490330 0.612424 0.193922 -2.528 0.011456 *
local_spreadsubmucosa -1.337841 0.262411 0.536169 -2.495 0.012589 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
rxLev 0.9934 1.0067 0.79856 1.2357
rxLev+5FU 0.6897 1.4500 0.54564 0.8717
surg 1.2774 0.7828 1.04354 1.5636
obstruct 1.2907 0.7748 1.02984 1.6175
differentiationpoor 1.4645 0.6828 1.15425 1.8581
differentiationwell 1.0642 0.9397 0.76908 1.4726
node4 2.4803 0.4032 2.04760 3.0045
local_spreadmuscle 0.3761 2.6589 0.22963 0.6160
local_spreadserosa 0.6124 1.6329 0.41878 0.8956
local_spreadsubmucosa 0.2624 3.8108 0.09175 0.7505
Concordance= 0.669 (se = 0.012 )
Likelihood ratio test= 141.8 on 10 df, p=<2e-16
Wald test = 145.2 on 10 df, p=<2e-16
Score (logrank) test = 156 on 10 df, p=<2e-16
c_index_m3 <- concordance(m3)
cat("Concordance of the multivariate model2:",c_index_m3$concordance)Concordance of the multivariate model2: 0.6691634
anova(m0, m1, m2, m2a, m2b, m2c, m2d, m2e, m2f, m3)Analysis of Deviance Table
Cox model: response is Surv(time, status)
Model 1: ~ 1
Model 2: ~ rx
Model 3: ~ rx + age + sex + perfor + adhere + surg + obstruct + differentiation + node4 + local_spread
Model 4: ~ rx
Model 5: ~ age + sex
Model 6: ~ perfor + adhere + obstruct
Model 7: ~ differentiation
Model 8: ~ local_spread
Model 9: ~ surg
Model 10: ~ rx + surg + obstruct + differentiation + node4 + local_spread
loglik Chisq Df Pr(>|Chi|)
1 -2930.2
2 -2924.1 12.148 2 0.0023022 **
3 -2856.7 134.834 12 < 2.2e-16 ***
4 -2924.1 134.834 12 < 2.2e-16 ***
5 -2930.1 11.893 0 < 2.2e-16 ***
6 -2924.8 10.566 1 0.0011516 **
7 -2922.5 4.520 1 0.0335011 *
8 -2915.6 13.871 1 0.0001958 ***
9 -2927.7 24.205 2 5.545e-06 ***
10 -2859.3 136.757 9 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Insight: rx, adhere, surg, obstruct, differentiation and local_spread are significant predictors. However, the model concordance is low (~0.5) when each was included separately. Model m2, which included all predictors, followed by model 3 (including selected significant predictors) have the highest concordance.
Perform Stepwise variable selection:
library(MASS) # for stepwise regression
#### Use the MASS package stepAIC() function for stepwise selection by using AIC (Akaike Information Criterion) as the measure to add or remove predictors from the model.
# model including all variables
m2 <- coxph(Surv(time, status) ~ rx+ age + sex + perfor + adhere + surg + obstruct + differentiation + node4+
local_spread, data = df)
# stepwise selection
stepwise_model <- stepAIC(m2, direction = "both")Start: AIC=5741.4
Surv(time, status) ~ rx + age + sex + perfor + adhere + surg +
obstruct + differentiation + node4 + local_spread
Df AIC
- perfor 1 5739.4
- sex 1 5739.6
- adhere 1 5741.0
<none> 5741.4
- age 1 5742.5
- surg 1 5744.5
- obstruct 1 5745.1
- differentiation 2 5745.4
- rx 2 5749.9
- local_spread 3 5753.1
- node4 1 5822.0
Step: AIC=5739.4
Surv(time, status) ~ rx + age + sex + adhere + surg + obstruct +
differentiation + node4 + local_spread
Df AIC
- sex 1 5737.6
- adhere 1 5739.1
<none> 5739.4
- age 1 5740.5
+ perfor 1 5741.4
- surg 1 5742.5
- obstruct 1 5743.2
- differentiation 2 5743.5
- rx 2 5747.9
- local_spread 3 5751.1
- node4 1 5820.1
Step: AIC=5737.59
Surv(time, status) ~ rx + age + adhere + surg + obstruct + differentiation +
node4 + local_spread
Df AIC
- adhere 1 5737.3
<none> 5737.6
- age 1 5738.6
+ sex 1 5739.4
+ perfor 1 5739.6
- surg 1 5740.7
- obstruct 1 5741.2
- differentiation 2 5741.7
- rx 2 5746.2
- local_spread 3 5749.3
- node4 1 5818.2
Step: AIC=5737.26
Surv(time, status) ~ rx + age + surg + obstruct + differentiation +
node4 + local_spread
Df AIC
<none> 5737.3
+ adhere 1 5737.6
- age 1 5738.6
+ sex 1 5739.1
+ perfor 1 5739.2
- surg 1 5740.7
- obstruct 1 5740.9
- differentiation 2 5742.1
- rx 2 5746.0
- local_spread 3 5750.4
- node4 1 5817.4
summary(stepwise_model)Call:
coxph(formula = Surv(time, status) ~ rx + age + surg + obstruct +
differentiation + node4 + local_spread, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
rxLev -0.010789 0.989269 0.111379 -0.097 0.92283
rxLev+5FU -0.375746 0.686776 0.119575 -3.142 0.00168 **
age 0.007366 1.007393 0.004051 1.818 0.06901 .
surg 0.243871 1.276179 0.103202 2.363 0.01813 *
obstruct 0.283165 1.327324 0.116138 2.438 0.01476 *
differentiationpoor 0.373783 1.453222 0.121599 3.074 0.00211 **
differentiationwell 0.069022 1.071459 0.165690 0.417 0.67699
node4 0.929854 2.534140 0.098507 9.440 < 2e-16 ***
local_spreadmuscle -0.995556 0.369518 0.252106 -3.949 7.85e-05 ***
local_spreadserosa -0.501169 0.605822 0.194154 -2.581 0.00984 **
local_spreadsubmucosa -1.322018 0.266597 0.536289 -2.465 0.01370 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
rxLev 0.9893 1.0108 0.79526 1.2306
rxLev+5FU 0.6868 1.4561 0.54329 0.8682
age 1.0074 0.9927 0.99943 1.0154
surg 1.2762 0.7836 1.04248 1.5623
obstruct 1.3273 0.7534 1.05711 1.6666
differentiationpoor 1.4532 0.6881 1.14506 1.8443
differentiationwell 1.0715 0.9333 0.77436 1.4826
node4 2.5341 0.3946 2.08921 3.0738
local_spreadmuscle 0.3695 2.7062 0.22545 0.6057
local_spreadserosa 0.6058 1.6506 0.41408 0.8864
local_spreadsubmucosa 0.2666 3.7510 0.09319 0.7627
Concordance= 0.672 (se = 0.012 )
Likelihood ratio test= 145.1 on 11 df, p=<2e-16
Wald test = 149.1 on 11 df, p=<2e-16
Score (logrank) test = 159.7 on 11 df, p=<2e-16
summary_table <- tbl_regression(m2 , exponentiate = TRUE)
# Print the summary table
summary_table| Characteristic | HR1 | 95% CI1 | p-value |
|---|---|---|---|
| rx | |||
| Obs | — | — | |
| Lev | 0.98 | 0.79, 1.22 | 0.9 |
| Lev+5FU | 0.69 | 0.54, 0.87 | 0.002 |
| age | 1.01 | 1.00, 1.02 | 0.083 |
| sex | 1.04 | 0.86, 1.26 | 0.7 |
| perfor | 1.00 | 0.59, 1.70 | >0.9 |
| adhere | 1.18 | 0.92, 1.53 | 0.2 |
| surg | 1.27 | 1.03, 1.55 | 0.022 |
| obstruct | 1.33 | 1.06, 1.68 | 0.015 |
| differentiation | |||
| moderate | — | — | |
| poor | 1.43 | 1.13, 1.82 | 0.003 |
| well | 1.08 | 0.78, 1.50 | 0.6 |
| node4 | 2.55 | 2.10, 3.09 | <0.001 |
| local_spread | |||
| contiguous | — | — | |
| muscle | 0.39 | 0.23, 0.64 | <0.001 |
| serosa | 0.64 | 0.43, 0.94 | 0.023 |
| submucosa | 0.29 | 0.10, 0.83 | 0.021 |
| 1 HR = Hazard Ratio, CI = Confidence Interval | |||
# Extract the coefficients of the selected model
selected_variables <- coef(stepwise_model)
# Print the selected variables
print(selected_variables) rxLev rxLev+5FU age
-0.010789186 -0.375746378 0.007365529
surg obstruct differentiationpoor
0.243870576 0.283164609 0.373782987
differentiationwell node4 local_spreadmuscle
0.069021620 0.929854405 -0.995556083
local_spreadserosa local_spreadsubmucosa
-0.501168855 -1.322018421
# Multivariate model including variables selected based on stepwise variable selection. The same variables were significant based on anova test of the model that included all variables.
m4 <- coxph(Surv(time, status) ~ rx + age + surg + obstruct +
differentiation + node4 + local_spread, data = df)
summary(m4)Call:
coxph(formula = Surv(time, status) ~ rx + age + surg + obstruct +
differentiation + node4 + local_spread, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
rxLev -0.010789 0.989269 0.111379 -0.097 0.92283
rxLev+5FU -0.375746 0.686776 0.119575 -3.142 0.00168 **
age 0.007366 1.007393 0.004051 1.818 0.06901 .
surg 0.243871 1.276179 0.103202 2.363 0.01813 *
obstruct 0.283165 1.327324 0.116138 2.438 0.01476 *
differentiationpoor 0.373783 1.453222 0.121599 3.074 0.00211 **
differentiationwell 0.069022 1.071459 0.165690 0.417 0.67699
node4 0.929854 2.534140 0.098507 9.440 < 2e-16 ***
local_spreadmuscle -0.995556 0.369518 0.252106 -3.949 7.85e-05 ***
local_spreadserosa -0.501169 0.605822 0.194154 -2.581 0.00984 **
local_spreadsubmucosa -1.322018 0.266597 0.536289 -2.465 0.01370 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
rxLev 0.9893 1.0108 0.79526 1.2306
rxLev+5FU 0.6868 1.4561 0.54329 0.8682
age 1.0074 0.9927 0.99943 1.0154
surg 1.2762 0.7836 1.04248 1.5623
obstruct 1.3273 0.7534 1.05711 1.6666
differentiationpoor 1.4532 0.6881 1.14506 1.8443
differentiationwell 1.0715 0.9333 0.77436 1.4826
node4 2.5341 0.3946 2.08921 3.0738
local_spreadmuscle 0.3695 2.7062 0.22545 0.6057
local_spreadserosa 0.6058 1.6506 0.41408 0.8864
local_spreadsubmucosa 0.2666 3.7510 0.09319 0.7627
Concordance= 0.672 (se = 0.012 )
Likelihood ratio test= 145.1 on 11 df, p=<2e-16
Wald test = 149.1 on 11 df, p=<2e-16
Score (logrank) test = 159.7 on 11 df, p=<2e-16
anova(m4)Analysis of Deviance Table
Cox model: response is Surv(time, status)
Terms added sequentially (first to last)
loglik Chisq Df Pr(>|Chi|)
NULL -2930.2
rx -2924.1 12.1478 2 0.0023022 **
age -2923.9 0.3443 1 0.5573434
surg -2921.7 4.5404 1 0.0331029 *
obstruct -2919.3 4.7876 1 0.0286648 *
differentiation -2911.3 15.9492 2 0.0003441 ***
node4 -2867.2 88.1915 1 < 2.2e-16 ***
local_spread -2857.6 19.1615 3 0.0002532 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cox_summary <- tidy(m4)
cox_summary# A tibble: 11 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 rxLev -0.0108 0.111 -0.0969 9.23e- 1
2 rxLev+5FU -0.376 0.120 -3.14 1.68e- 3
3 age 0.00737 0.00405 1.82 6.90e- 2
4 surg 0.244 0.103 2.36 1.81e- 2
5 obstruct 0.283 0.116 2.44 1.48e- 2
6 differentiationpoor 0.374 0.122 3.07 2.11e- 3
7 differentiationwell 0.0690 0.166 0.417 6.77e- 1
8 node4 0.930 0.0985 9.44 3.74e-21
9 local_spreadmuscle -0.996 0.252 -3.95 7.85e- 5
10 local_spreadserosa -0.501 0.194 -2.58 9.84e- 3
11 local_spreadsubmucosa -1.32 0.536 -2.47 1.37e- 2
c_index_m4 <- concordance(m4)
cat("Concordance of the model with multivariate stepwise v_select:",c_index_m4$concordance)Concordance of the model with multivariate stepwise v_select: 0.6718376
Test whether Proportional hazard assumptions are met for model 4 predictors
cox.zph(m4) # final model with stepwise variable selection chisq df p
rx 2.3352 2 0.31111
age 0.5487 1 0.45885
surg 0.0197 1 0.88827
obstruct 6.1477 1 0.01316
differentiation 17.4588 2 0.00016
node4 5.6618 1 0.01734
local_spread 7.0826 3 0.06931
GLOBAL 37.5250 11 9.4e-05
zph_test <- cox.zph(m4)
print(zph_test) chisq df p
rx 2.3352 2 0.31111
age 0.5487 1 0.45885
surg 0.0197 1 0.88827
obstruct 6.1477 1 0.01316
differentiation 17.4588 2 0.00016
node4 5.6618 1 0.01734
local_spread 7.0826 3 0.06931
GLOBAL 37.5250 11 9.4e-05
# plot the Schoenfeld residuals
plot(zph_test)Insight: Differentiation, node4 and obstruct variables did not meet proportional hazards assumption.
Stratify model by variables violating roportional hazard assumption
m5 <- coxph(Surv(time, status) ~ rx + age + surg + strata(obstruct) + strata(differentiation) + node4 +
local_spread, data = df)
summary(m5)Call:
coxph(formula = Surv(time, status) ~ rx + age + surg + strata(obstruct) +
strata(differentiation) + node4 + local_spread, data = df)
n= 929, number of events= 452
coef exp(coef) se(coef) z Pr(>|z|)
rxLev -0.019015 0.981164 0.111678 -0.170 0.86480
rxLev+5FU -0.349150 0.705288 0.119460 -2.923 0.00347 **
age 0.008636 1.008674 0.004074 2.120 0.03400 *
surg 0.258591 1.295104 0.103252 2.504 0.01226 *
node4 0.917350 2.502650 0.099125 9.255 < 2e-16 ***
local_spreadmuscle -1.067692 0.343801 0.252445 -4.229 2.34e-05 ***
local_spreadserosa -0.552546 0.575483 0.194432 -2.842 0.00449 **
local_spreadsubmucosa -1.445837 0.235549 0.536904 -2.693 0.00708 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
rxLev 0.9812 1.0192 0.78828 1.2212
rxLev+5FU 0.7053 1.4179 0.55806 0.8914
age 1.0087 0.9914 1.00065 1.0168
surg 1.2951 0.7721 1.05783 1.5856
node4 2.5027 0.3996 2.06075 3.0393
local_spreadmuscle 0.3438 2.9087 0.20962 0.5639
local_spreadserosa 0.5755 1.7377 0.39313 0.8424
local_spreadsubmucosa 0.2355 4.2454 0.08224 0.6747
Concordance= 0.674 (se = 0.015 )
Likelihood ratio test= 122.9 on 8 df, p=<2e-16
Wald test = 126.4 on 8 df, p=<2e-16
Score (logrank) test = 133.9 on 8 df, p=<2e-16
cox.zph(m5) # final model with stratification by variables violating proportional hazard assumption chisq df p
rx 2.0007 2 0.368
age 0.6704 1 0.413
surg 0.0142 1 0.905
node4 4.2882 1 0.038
local_spread 5.2976 3 0.151
GLOBAL 12.4113 8 0.134
zph_test <- cox.zph(m5)
print(zph_test) chisq df p
rx 2.0007 2 0.368
age 0.6704 1 0.413
surg 0.0142 1 0.905
node4 4.2882 1 0.038
local_spread 5.2976 3 0.151
GLOBAL 12.4113 8 0.134
# plot the Schoenfeld residuals
plot(zph_test)Insight: After model stratification by obstruct and differentiation, the proportional hazard assumption is met as the global p >0.05. Node4 slightly violates assumption, but the final model is not stratified by node 4 because the model concordance is attenuated when stratifying by node4.
Model comparision
library(survival)
library(dplyr)
library(knitr)
library(kableExtra)
# Fit the models and store them in a list
models <- list(m0, m1, m2, m3, m4)
# Add descriptions for each model
descriptions <- c(
"Model 0 - Base model",
"Model 1 - Treatment",
"Model 2 - Full variables",
"Model 3 - Selected stepwisely",
"Model 4 - model 4 Stratified"
)
# Create a data frame to store results
results <- data.frame(
Model = character(),
Description = character(),
AIC = numeric(),
BIC = numeric(),
C_Index = numeric(),
stringsAsFactors = FALSE
)
# Function to calculate and store metrics for each model
for (i in seq_along(models)) {
model <- models[[i]]
# Extract AIC and BIC
aic <- AIC(model)
bic <- BIC(model)
# Add C-index
c_index <- concordance(model)$concordance
# Append results to the data frame
results <- rbind(results, data.frame(
Model = paste("Model", i),
Description = descriptions[i],
AIC = aic,
BIC = bic,
C_Index = round(c_index, 3)
))
}
# Print the table using kable and kableExtra
results %>%
kbl(caption = "Model Evaluation Metrics") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE) %>%
column_spec(2:5, width = "10em") %>%
kable_styling(position = "center")| Model | Description | AIC | BIC | C_Index |
|---|---|---|---|---|
| Model 1 | Model 0 - Base model | 5860.383 | 5860.383 | 0.500 |
| Model 2 | Model 1 - Treatment | 5852.236 | 5860.463 | 0.536 |
| Model 3 | Model 2 - Full variables | 5741.401 | 5798.993 | 0.674 |
| Model 4 | Model 3 - Selected stepwisely | 5738.620 | 5779.757 | 0.669 |
| Model 5 | Model 4 - model 4 Stratified | 5737.261 | 5782.511 | 0.672 |
K-fold cross validation
library(survival)
library(boot) # for bootstrapping
library(survcomp) # to calculate c-index
library(caret)
set.seed(1234)
# Cox model
cox_model <- coxph(Surv(time, status) ~ rx + age + surg + strata(obstruct) + strata(differentiation) + node4 +
local_spread, data = df)
# calculate the original c-index
c_index_original <- concordance(cox_model)
cat("original c-index:", c_index_original$concordance, "\n")original c-index: 0.674316
# create a function for calculating c-index in each fold using concordance()
cox_cindex <- function(train_data, test_data) {
fit <- coxph(Surv(time, status) ~ rx + age + surg + strata(obstruct) + strata(differentiation) + node4 + local_spread, data = train_data)
# Calculate concordance on test data
c_index <- concordance(fit, newdata = test_data)$concordance
return(c_index)
}
# perform 5-fold cross-validation with stratification
K <- 5
folds <- createFolds(c(df$status, df$differentiation, df$rx), k = K, list = TRUE, returnTrain = TRUE)
cv_c_indices <- sapply(folds, function(train_indices) {
train_data <- df[train_indices, ]
test_data <- df[-train_indices, ]
cox_cindex(train_data, test_data) # use the concordance function inside cox_cindex
})
# Print cross-validated c-indices
print(cv_c_indices) Fold1 Fold2 Fold3 Fold4 Fold5
0.7156051 0.6605045 0.6562016 0.6477106 0.6943820
# cross-validation c-indices
cat("cross-validated c-Indices for each fold:", cv_c_indices, "\n")cross-validated c-Indices for each fold: 0.7156051 0.6605045 0.6562016 0.6477106 0.694382
cat("mean cross-validated c-Index:", mean(cv_c_indices), "\n")mean cross-validated c-Index: 0.6748808
# plot cross-validation c-indices
plot(cv_c_indices, type = "b", xlab = "Fold", ylab = "c-index", main = "c-index across folds")Insight: The original model c-index (0.674) and mean cross-validation c-index (0.675) is very similar, suggesting the the final stratified model is stable and is not overfitting.
V. Results
Table 2. Univariate model: Survival after Chemotherapy for stage B/C Colon Cancer
| Treatment | Coefficient | Hazard ratio | 95% CI_upper | 95% CI_lower | P-value |
|---|---|---|---|---|---|
| Amisole (Lev) | -0.027 | 0.974 | 0.784 | 1.209 | 0.809 |
| Amisole + 5-FU | -0.372 | 0.690 | 0.546 | 0.870 | 0.002 |
Table 3. Multivariate model: Survival after Chemotherapy for stage B/C Colon Cancer
| Treatment | Coefficient | Hazard ratio | 95% CI_upper | 95% CI_lower | P-value |
|---|---|---|---|---|---|
| Amisole (Lev) | -0.011 | 0.989 | 0.795 | 1.231 | 0.923 |
| Amisole + 5-FU | -0.376 | 0.687 | 0.543 | 0.868 | 0.002 |
| Age | 0.007 | 1.007 | 0.999 | 1.015 | 0.069 |
| Surge | 0.244 | 1.276 | 1.042 | 1.562 | 0.018 |
| Obstruction of colon | 0.283 | 1.327 | 1.057 | 1.667 | 0.015 |
| Differentiat ion_poor | 0.374 | 1.453 | 1.145 | 1.844 | 0.002 |
| Differentiat ion_well | 0.069 | 1.072 | 0.774 | 1.483 | 0.677 |
| More than 4 nodes (+) | 0.930 | 2.534 | 2.089 | 3.074 | 3.75 x 10-21 |
| Local spread_muscle | -0.996 | 0.370 | 0.225 | 0.606 | 7.85 x 10-5 |
| Local spread_serosa | -0.501 | 0.606 | 0.414 | 0.886 | 0.010 |
| Local spread_submucosa | -1.322 | 0.267 | 0.093 | 0.763 | 0.014 |